In [28]:
# If R packages are not installed, try:
#install.packages("BiocInstaller",
#  repos="http://bioconductor.org/packages/3.1/bioc")
#source("http://bioconductor.org/biocLite.R")
#biocLite("DESeq2")
#install.packages("RColorBrewer",contriburl = contrib.url("http://cran.us.r-project.org"))
#install.packages("gplots",contriburl = contrib.url("http://cran.us.r-project.org"))
In [29]:
# load necessary R packages
library(DESeq2)
library(RColorBrewer)
library(gplots)
In [30]:
# source these scripts:
source('plotPCAWithSampleNames.R')
source('overLapper_original.R')
In [31]:
# load the data:
data.1<-read.csv("killifish_allcounts.csv")
# show sample names:
#colnames(data.1)
id<-data.1$GeneID
rownames(data.1)<-id
# show first 6 rows of counts data:
#head(data.1)
In [32]:
# load annotation file
annotation<-read.table("kfish2rae5g.annotation.transcript.name.id", fill=TRUE,header=FALSE)
colnames(annotation)<-c("id","gene")
#head(annotation)
# need to fix this so all gene name words are in one column
In [33]:
# separate only F_heteroclitus.MDPP data
F_heteroclitus.MDPP<-data.1[,c(50:58)]
colnames(F_heteroclitus.MDPP)
col.names<-colnames(F_heteroclitus.MDPP)
head(F_heteroclitus.MDPP)
Out[33]:
  1. 'F_heteroclitus_MDPP_BW_1'
  2. 'F_heteroclitus_MDPP_BW_2'
  3. 'F_heteroclitus_MDPP_BW_3'
  4. 'F_heteroclitus_MDPP_FW_1'
  5. 'F_heteroclitus_MDPP_FW_2'
  6. 'F_heteroclitus_MDPP_FW_3'
  7. 'F_heteroclitus_MDPP_transfer_1'
  8. 'F_heteroclitus_MDPP_transfer_2'
  9. 'F_heteroclitus_MDPP_transfer_3'
Out[33]:
F_heteroclitus_MDPP_BW_1F_heteroclitus_MDPP_BW_2F_heteroclitus_MDPP_BW_3F_heteroclitus_MDPP_FW_1F_heteroclitus_MDPP_FW_2F_heteroclitus_MDPP_FW_3F_heteroclitus_MDPP_transfer_1F_heteroclitus_MDPP_transfer_2F_heteroclitus_MDPP_transfer_3
Funhe2EKm000003t1116101004
Funhe2EKm000004t112019870169641583772129
Funhe2EKm000005t10401502410
Funhe2EKm000006t1514640465344
Funhe2EKm000007t1131314001
Funhe2EKm000008t14319576112560819
In [34]:
conditions = sapply(strsplit(col.names,"_"),`[`,4)
genus = sapply(strsplit(col.names,"_"),`[`,1)
species = sapply(strsplit(col.names,"_"),`[`,2)
genus_species = paste(genus,species,sep="_")
pop = sapply(strsplit(col.names,"_"),`[`,3)
genus_species_pop = paste(genus_species,pop,sep=".")
genus_species = gsub(".NA", "", genus_species_pop)
ExpDesign <- data.frame(row.names=colnames(F_heteroclitus.MDPP), condition = conditions,genus_species = genus_species)
ExpDesign
Out[34]:
conditiongenus_species
F_heteroclitus_MDPP_BW_1BWF_heteroclitus.MDPP
F_heteroclitus_MDPP_BW_2BWF_heteroclitus.MDPP
F_heteroclitus_MDPP_BW_3BWF_heteroclitus.MDPP
F_heteroclitus_MDPP_FW_1FWF_heteroclitus.MDPP
F_heteroclitus_MDPP_FW_2FWF_heteroclitus.MDPP
F_heteroclitus_MDPP_FW_3FWF_heteroclitus.MDPP
F_heteroclitus_MDPP_transfer_1transferF_heteroclitus.MDPP
F_heteroclitus_MDPP_transfer_2transferF_heteroclitus.MDPP
F_heteroclitus_MDPP_transfer_3transferF_heteroclitus.MDPP
In [35]:
cds<-DESeqDataSetFromMatrix(countData=F_heteroclitus.MDPP, 
                            colData=ExpDesign,design= ~ condition)
cds<-DESeq(cds, betaPrior=FALSE)
log_cds<-rlog(cds)
plotPCAWithSampleNames(log_cds,intgroup="condition",ntop=40000)
estimating size factors
estimating dispersions
gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates
fitting model and testing
In [36]:
res.1<-results(cds,contrast=c("condition","BW","FW"))
res.2<-results(cds,contrast=c("condition","transfer","FW"))
res.3<-results(cds,contrast=c("condition","transfer","BW"))
res1_ordered <-as.data.frame(res.1[order(res.1$padj),])
res1_filtered <-subset(res1_ordered,res1_ordered$padj<0.05)
res1_filtered <-subset(res1_filtered,res1_filtered$log2FoldChange>1 | res1_filtered$log2FoldChange< -1)
id<-rownames(res1_filtered)
res1_filtered<-cbind(res1_filtered,id)
dim(res1_filtered)
res2_ordered <-as.data.frame(res.2[order(res.2$padj),])
res2_filtered<-subset(res2_ordered,res2_ordered$padj<0.05)
res2_filtered <-subset(res2_filtered,res2_filtered$log2FoldChange>1 | res2_filtered$log2FoldChange< -1)
id<-rownames(res2_filtered)
res2_filtered<-cbind(res2_filtered,id)
dim(res2_filtered)
res3_ordered<-as.data.frame(res.3[order(res.3$padj),])
res3_filtered<-subset(res3_ordered,res3_ordered$padj<0.05)
res3_filtered <-subset(res3_filtered,res3_filtered$log2FoldChange>1 | res3_filtered$log2FoldChange< -1)
id<-rownames(res3_filtered)
res3_filtered<-cbind(res3_filtered,id)
dim(res3_filtered)
Out[36]:
  1. 21
  2. 7
Out[36]:
  1. 21
  2. 7
Out[36]:
  1. 33
  2. 7
In [37]:
# get normalized counts
# add id column
F_heteroclitus.MDPP_norm_counts<-counts(cds,normalized=TRUE)
id<-rownames(F_heteroclitus.MDPP_norm_counts)
F_heteroclitus.MDPP_norm_counts<-cbind(F_heteroclitus.MDPP_norm_counts,id)
In [38]:
# merge res1, res2, res3 with counts
# "BW","FW"
res1_df<-as.data.frame(res.1)
colnames(res1_df)<-paste(colnames(res1_df),"BW_FW", sep='.')
id<-rownames(res1_df)
res1_df<-cbind(res1_df,id)
# "transfer","FW"
res2_df<-as.data.frame(res.2)
colnames(res2_df)<-paste(colnames(res2_df),"transfer_FW", sep='.')
id<-rownames(res2_df)
res2_df<-cbind(res2_df,id)
# "transfer","BW"
res3_df<-as.data.frame(res.3)
colnames(res3_df)<-paste(colnames(res3_df),"transfer_BW", sep='.')
id<-rownames(res3_df)
res3_df<-cbind(res3_df,id)
F_heteroclitus.MDPP_res<-merge(F_heteroclitus.MDPP_norm_counts,res1_df,by="id")
F_heteroclitus.MDPP_res<-merge(F_heteroclitus.MDPP_res,res2_df,by="id")
F_heteroclitus.MDPP_res<-merge(F_heteroclitus.MDPP_res,res3_df,by="id")
dim(F_heteroclitus.MDPP_res)
F_heteroclitus.MDPP_res<-F_heteroclitus.MDPP_res[complete.cases(F_heteroclitus.MDPP_res),]
dim(F_heteroclitus.MDPP_res)
F_heteroclitus.MDPP_annotated<-merge(F_heteroclitus.MDPP_res,annotation,by="id")
F_heteroclitus.MDPP_annotated<-F_heteroclitus.MDPP_annotated[,c(ncol(F_heteroclitus.MDPP_annotated),1:(ncol(F_heteroclitus.MDPP_annotated)-1))]
#write.csv(F_heteroclitus.MDPP_annotated,"F_heteroclitus.MDPP_results_all.csv")
Out[38]:
  1. 32658
  2. 28
Out[38]:
  1. 10755
  2. 28
In [39]:
plot(log2(res.1$baseMean), res.1$log2FoldChange, 
     col=ifelse(res.1$padj < 0.05, "red","gray67"),
     main="F. heteroclitus.MDPP (BW vs. FW) (padj<0.05)",xlim=c(1,15),pch=20,cex=1)
abline(h=c(-1,1), col="blue")
In [40]:
plot(log2(res.2$baseMean), res.2$log2FoldChange, 
     col=ifelse(res.2$padj < 0.05, "red","gray67"),
     main="F. heteroclitus.MDPP (transfer vs. FW) (padj<0.05)",xlim=c(1,15),pch=20,cex=1)
abline(h=c(-1,1), col="blue")
In [41]:
plot(log2(res.3$baseMean), res.3$log2FoldChange, 
     col=ifelse(res.3$padj < 0.05, "red","gray67"),
     main="F. heteroclitus.MDPP (transfer vs. BW) (padj<0.05)",xlim=c(1,15),pch=20,cex=1)
abline(h=c(-1,1), col="blue")
In [42]:
m<-res1_filtered$id
length(m)
n<-res2_filtered$id
length(n)
o<-res3_filtered$id
length(o)
setlist <- list(BW_FW=as.vector(m),transfer_FW=as.vector(n),transfer_BW=as.vector(o))
OLlist <- overLapper(setlist=setlist, sep="", type="vennsets")
counts <- sapply(OLlist$Venn_List, length)
vennPlot(counts=counts)
Out[42]:
21
Out[42]:
21
Out[42]:
33
In [43]:
# extract intersections:
names(OLlist$Venn_List)
overlap_BW_FWtransfer_FW<-OLlist$Venn_List$BW_FWtransfer_FW
length(overlap_BW_FWtransfer_FW)
overlap_BW_FWtransfer_BW<-OLlist$Venn_List$BW_FWtransfer_BW
length(overlap_BW_FWtransfer_BW)
overlap_transfer_FWtransfer_BW<-OLlist$Venn_List$transfer_FWtransfer_BW
length(overlap_transfer_FWtransfer_BW)

# get lists of unique genes for each comparison
F_heteroclitus.MDPP_BW_FW<-OLlist$Venn_List$BW_FW
length(F_heteroclitus.MDPP_BW_FW)
F_heteroclitus.MDPP_transfer_FW<-OLlist$Venn_List$transfer_FW
length(F_heteroclitus.MDPP_transfer_FW)
F_heteroclitus.MDPP_transfer_BW<-OLlist$Venn_List$transfer_BW
length(F_heteroclitus.MDPP_transfer_BW)
Out[43]:
  1. 'BW_FW'
  2. 'transfer_FW'
  3. 'transfer_BW'
  4. 'BW_FWtransfer_FW'
  5. 'BW_FWtransfer_BW'
  6. 'transfer_FWtransfer_BW'
  7. 'BW_FWtransfer_FWtransfer_BW'
Out[43]:
7
Out[43]:
5
Out[43]:
6
Out[43]:
9
Out[43]:
8
Out[43]:
22
In [ ]: